#!/usr/bin/python
# -*- coding:utf-8 -*-
# @author  : micah
# @time    : 2023/12/2 17:44
# @function: 爬取4399游戏名和网址，静态信息需要用
# @version :


import json
import requests
from lxml import etree
from bs4 import BeautifulSoup

url = 'https://www.4399.com/flash/'
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'
}
response = requests.get(url, headers=headers).content.decode('gb2312')
# print(response)
"""xpath"""
# html = etree.HTML(response)
# li_list = html.xpath('//div[@class="bre oh"]/ul/li')
# # print(li_list)
# data_list = []
# for li in li_list:
#     item = {'title': li.xpath('./a/b/text()')[0],
#             'href': li.xpath('./a/@href')[0]}
#     data_list.append(item)
#
# with open('4399.json', 'w', encoding='utf-8')as f:
#     # 默认使用的是Unicode编码， ensure_ascii改为False； indent美化json数据
#     f.write(json.dumps(data_list, ensure_ascii=False, indent=2))
"""bs4"""
soup = BeautifulSoup(response, 'lxml')
game_list = soup.find('ul', class_='n-game cf').find_all('li')

game_info = list()

for game in game_list:
    item = dict()
    game_name = game.find('b').text
    game_url = game.find('a').get('href')
    if game_url.startswith('/flash'):
        game_url = 'https://www.4399.com' + game_url
    item['game_name'] = game_name
    item['game_url'] = game_url
    game_info.append(item)


with open('game_info.json', 'w', encoding='utf-8') as f:
    f.write(json.dumps(game_info, indent=4, ensure_ascii=False))




